import requests
from bs4 import BeautifulSoup
class Scrawler():
    def __init__(self):
        self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.39"}
    def getCodes(self,url):
        return requests.get(url,headers=self.headers).text
        # rs.encoding = rs.apparent_encoding
        # return rs.text
    #获取纯文本
    def getTexts(self, url, selector):
        bs = BeautifulSoup(self.getCodes(url), 'html.parser')
        return str([item.text for item in bs.select(selector=selector)])
    #获取超链接url
    def getUrl(self, url, selector):
        bs = BeautifulSoup(self.getCodes(url), 'html.parser')
        return [item.attrs['href'] for item in bs.select(selector=selector)]
    #获取图片的src地址
    def getImgUrl(self,url,selector):
        bs = BeautifulSoup(self.getCodes(url),'html.parser')
        return [item.attrs['src'] for item in bs.select(selector=selector)]
    def getImgs(self, url, imgName):
        rs = requests.get(url,headers=self.headers).content
        with open(imgName,'wb') as f:
            f.write(rs)